rm(list = ls(all = TRUE))
library(dplyr)
library(ggplot2)
library(randomForest)
library(inTrees)
set.seed(1)
data <- rbind(c("1", "1", "C0"), c("1", "0", "C1"), c("0", "1", "C1"), c("0", 
    "0", "C1"), c("0", "0", "C0"), c("0", "0", "C0"), c("0", "0", "C0")) %>% 
    as.data.frame()
colnames(data) <- c("X1", "X2", "Classs")


rf <- randomForest(Classs ~ ., data = data, keep.inbag = TRUE)
X <- data[, c("X1", "X2")]
target <- data$Classs

imp <- as.data.frame(rf$importance)
colnames(imp)[colnames(imp) == "MeanDecreaseGini"] <- "importance"
imp <- imp[order(imp$importance, decreasing = FALSE), , drop = FALSE]
imp$feature <- rownames(imp)
imp$feature <- factor(imp$feature, levels = as.character(imp$feature))
theme_set(theme_gray(base_size = 18))
ggplot(data = imp, aes(x = feature, y = importance)) + geom_bar(stat = "identity", 
    aes(factor(feature)), fill = "red") + theme(axis.title.y = element_blank(), 
    axis.text.y = element_text(hjust = 1, size = 15)) + coord_flip()

rm(list = ls(all = TRUE))
library(arules)
library(randomForest)
library(RRF)
library(inTrees)
library(reshape)
library(ggplot2)
library(RCurl)
set.seed(1)

data <- read.csv(text = getURL("https://raw.githubusercontent.com/shuailab/ind_498/master/resource/data/AD.csv"))

target_indx <- which(colnames(data) == "DX_bl")
target <- paste0("class_", as.character(data[, target_indx]))
rm_indx <- which(colnames(data) %in% c("DX_bl", "ID", "TOTAL13", "MMSCORE"))
X <- data
X <- X[, -rm_indx]
for (i in 1:ncol(X)) X[, i] <- as.factor(dicretizeVector(X[, i], K = 3))

rf <- randomForest(X, as.factor(target))

treeList <- RF2List(rf)  # transform rf object to an inTrees' format
exec <- extractRules(treeList, X)  # R-executable conditions
## 4555 rules (length<=6) were extracted from the first 100 trees.
class <- paste0("class_", as.character(target))
rules <- getRuleMetric(exec, X, target)
print(rules[order(as.numeric(rules[, "len"])), ][1:5, ])
##      len freq    err     condition                                 
## [1,] "2" "0.118" "0.098" "X[,6] %in% c('L1') & X[,11] %in% c('L1')"
## [2,] "2" "0.182" "0"     "X[,4] %in% c('L1') & X[,6] %in% c('L1')" 
## [3,] "2" "0.182" "0"     "X[,4] %in% c('L1') & X[,6] %in% c('L1')" 
## [4,] "2" "0.081" "0.024" "X[,3] %in% c('L3') & X[,4] %in% c('L3')" 
## [5,] "2" "0.043" "0.136" "X[,6] %in% c('L3') & X[,7] %in% c('L3')" 
##      pred     
## [1,] "class_1"
## [2,] "class_1"
## [3,] "class_1"
## [4,] "class_0"
## [5,] "class_0"
rules.pruned <- pruneRule(rules, X, target, maxDecay = 0.005, typeDecay = 2)

length <- data.frame(original = as.numeric(rules[, "len"]), pruned = as.numeric(rules.pruned[, 
    "len"]))
ggplot(melt(length), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.4) + ggtitle("Histogram of Lengths") + theme(plot.title = element_text(hjust = 0.5))

frequency <- data.frame(original = as.numeric(rules[, "freq"]), pruned = as.numeric(rules.pruned[, 
    "freq"]))
ggplot(melt(frequency), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.05) + ggtitle("Histogram of Frequencies") + theme(plot.title = element_text(hjust = 0.5))

error <- data.frame(original = as.numeric(rules[, "err"]), pruned = as.numeric(rules.pruned[, 
    "err"]))
ggplot(melt(error), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.01) + ggtitle("Histogram of Errors") + theme(plot.title = element_text(hjust = 0.5))

rules.pruned <- pruneRule(rules, X, target, maxDecay = 0.05, typeDecay = 1)

length <- data.frame(original = as.numeric(rules[, "len"]), pruned = as.numeric(rules.pruned[, 
    "len"]))
ggplot(melt(length), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.4) + ggtitle("Histogram of Lengths") + theme(plot.title = element_text(hjust = 0.5))

frequency <- data.frame(original = as.numeric(rules[, "freq"]), pruned = as.numeric(rules.pruned[, 
    "freq"]))
ggplot(melt(frequency), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.05) + ggtitle("Histogram of Frequencies") + theme(plot.title = element_text(hjust = 0.5))

error <- data.frame(original = as.numeric(rules[, "err"]), pruned = as.numeric(rules.pruned[, 
    "err"]))
ggplot(melt(error), aes(value, fill = variable)) + geom_histogram(position = "dodge", 
    binwidth = 0.01) + ggtitle("Histogram of Errors") + theme(plot.title = element_text(hjust = 0.5))

rules.selected <- selectRuleRRF(rules.pruned, X, target)
rules.present <- presentRules(rules.selected, colnames(X))
print(cbind(ID = 1:nrow(rules.present), rules.present[, c("condition", "pred")]))
##       ID  
##  [1,] "1" 
##  [2,] "2" 
##  [3,] "3" 
##  [4,] "4" 
##  [5,] "5" 
##  [6,] "6" 
##  [7,] "7" 
##  [8,] "8" 
##  [9,] "9" 
## [10,] "10"
## [11,] "11"
## [12,] "12"
## [13,] "13"
## [14,] "14"
## [15,] "15"
## [16,] "16"
##       condition                                                                                                               
##  [1,] "FDG %in% c('L1','L2') & HippoNV %in% c('L1')"                                                                          
##  [2,] "FDG %in% c('L1') & HippoNV %in% c('L1','L2')"                                                                          
##  [3,] "PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & rs3818361 %in% c('L2') & rs3851179 %in% c('L2')"   
##  [4,] "AGE %in% c('L3') & FDG %in% c('L1') & HippoNV %in% c('L1','L2')"                                                       
##  [5,] "PTEDUCAT %in% c('L1') & AV45 %in% c('L2') & HippoNV %in% c('L2') & rs610932 %in% c('L2')"                              
##  [6,] "HippoNV %in% c('L1') & rs3818361 %in% c('L1')"                                                                         
##  [7,] "AV45 %in% c('L3') & HippoNV %in% c('L1','L2') & e4_1 %in% c('L2')"                                                     
##  [8,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2') & e4_1 %in% c('L2') & rs3764650 %in% c('L1')"                            
##  [9,] "AGE %in% c('L1') & PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & HippoNV %in% c('L1')"           
## [10,] "AGE %in% c('L3') & PTGENDER %in% c('L1') & PTEDUCAT %in% c('L2') & AV45 %in% c('L1','L2')"                             
## [11,] "AGE %in% c('L2') & PTEDUCAT %in% c('L2','L3') & AV45 %in% c('L1','L2') & HippoNV %in% c('L2','L3') & e4_1 %in% c('L2')"
## [12,] "AGE %in% c('L2') & PTEDUCAT %in% c('L3') & e4_1 %in% c('L1')"                                                          
## [13,] "PTEDUCAT %in% c('L1','L3') & e4_1 %in% c('L1') & rs11136000 %in% c('L1') & rs610932 %in% c('L1')"                      
## [14,] "AGE %in% c('L2') & HippoNV %in% c('L2','L3') & rs3865444 %in% c('L1')"                                                 
## [15,] "AGE %in% c('L1','L2') & AV45 %in% c('L1')"                                                                             
## [16,] "PTEDUCAT %in% c('L1','L3') & FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')"                                        
##       pred     
##  [1,] "class_1"
##  [2,] "class_1"
##  [3,] "class_0"
##  [4,] "class_1"
##  [5,] "class_1"
##  [6,] "class_1"
##  [7,] "class_1"
##  [8,] "class_0"
##  [9,] "class_0"
## [10,] "class_0"
## [11,] "class_0"
## [12,] "class_0"
## [13,] "class_0"
## [14,] "class_0"
## [15,] "class_0"
## [16,] "class_0"
print(cbind(ID = 1:nrow(rules.present), rules.present[, c("len", "freq", "err")]))
##       ID   len freq    err    
##  [1,] "1"  "2" "0.279" "0.083"
##  [2,] "2"  "2" "0.279" "0.09" 
##  [3,] "3"  "5" "0.029" "0.133"
##  [4,] "4"  "3" "0.122" "0.016"
##  [5,] "5"  "4" "0.031" "0.312"
##  [6,] "6"  "2" "0.207" "0.121"
##  [7,] "7"  "3" "0.172" "0.124"
##  [8,] "8"  "4" "0.06"  "0.194"
##  [9,] "9"  "5" "0.006" "0"    
## [10,] "10" "4" "0.044" "0.13" 
## [11,] "11" "5" "0.019" "0.2"  
## [12,] "12" "3" "0.043" "0.182"
## [13,] "13" "4" "0.037" "0.158"
## [14,] "14" "3" "0.114" "0.203"
## [15,] "15" "2" "0.234" "0.215"
## [16,] "16" "3" "0.282" "0.144"
freqPattern <- getFreqPattern(rules.pruned)
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.01      1
##  maxlen target   ext
##       4  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 45 
## 
## set item appearances ...[2 item(s)] done [0.00s].
## set transactions ...[52 item(s), 4555 transaction(s)] done [0.00s].
## sorting and recoding items ... [50 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [93 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
top.pattern <- (freqPattern[which(as.numeric(freqPattern[, "len"]) >= 2), ][1:5, 
    ])
print(presentRules(top.pattern, colnames(X)))
##      len sup     conf   
## [1,] "2" "0.038" "1"    
## [2,] "2" "0.026" "1"    
## [3,] "2" "0.023" "0.991"
## [4,] "2" "0.022" "0.953"
## [5,] "2" "0.021" "0.99" 
##      condition                                            pred     
## [1,] "FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')"  "class_0"
## [2,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2','L3')" "class_0"
## [3,] "HippoNV %in% c('L1') & rs3818361 %in% c('L1')"      "class_1"
## [4,] "AV45 %in% c('L3') & HippoNV %in% c('L1')"           "class_1"
## [5,] "rs610932 %in% c('L1') & HippoNV %in% c('L2','L3')"  "class_0"
learner <- buildLearner(rules.selected, X, target)
learner.readable <- presentRules(learner, colnames(X))
print(cbind(ID = 1:nrow(learner.readable), learner.readable[, c("condition", 
    "pred")]))
##       ID  
##  [1,] "1" 
##  [2,] "2" 
##  [3,] "3" 
##  [4,] "4" 
##  [5,] "5" 
##  [6,] "6" 
##  [7,] "7" 
##  [8,] "8" 
##  [9,] "9" 
## [10,] "10"
## [11,] "11"
## [12,] "12"
##       condition                                                                                                            
##  [1,] "AGE %in% c('L3') & FDG %in% c('L1') & HippoNV %in% c('L1','L2')"                                                    
##  [2,] "FDG %in% c('L1','L2') & HippoNV %in% c('L1')"                                                                       
##  [3,] "AGE %in% c('L2') & PTEDUCAT %in% c('L3') & e4_1 %in% c('L1')"                                                       
##  [4,] "PTEDUCAT %in% c('L1','L3') & e4_1 %in% c('L1') & rs11136000 %in% c('L1') & rs610932 %in% c('L1')"                   
##  [5,] "AGE %in% c('L3') & PTGENDER %in% c('L1') & PTEDUCAT %in% c('L2') & AV45 %in% c('L1','L2')"                          
##  [6,] "PTGENDER %in% c('L2') & FDG %in% c('L2') & AV45 %in% c('L1','L2') & rs3818361 %in% c('L2') & rs3851179 %in% c('L2')"
##  [7,] "PTEDUCAT %in% c('L1','L3') & FDG %in% c('L2','L3') & HippoNV %in% c('L2','L3')"                                     
##  [8,] "AV45 %in% c('L1','L2') & HippoNV %in% c('L2') & e4_1 %in% c('L2') & rs3764650 %in% c('L1')"                         
##  [9,] "FDG %in% c('L1') & HippoNV %in% c('L1','L2')"                                                                       
## [10,] "AGE %in% c('L2') & HippoNV %in% c('L2','L3') & rs3865444 %in% c('L1')"                                              
## [11,] "AGE %in% c('L1','L2') & AV45 %in% c('L1')"                                                                          
## [12,] "Else"                                                                                                               
##       pred     
##  [1,] "class_1"
##  [2,] "class_1"
##  [3,] "class_0"
##  [4,] "class_0"
##  [5,] "class_0"
##  [6,] "class_0"
##  [7,] "class_0"
##  [8,] "class_0"
##  [9,] "class_1"
## [10,] "class_0"
## [11,] "class_0"
## [12,] "class_0"
print(cbind(ID = 1:nrow(learner.readable), learner.readable[, c("len", "freq", 
    "err")]))
##       ID   len freq                 err                 
##  [1,] "1"  "3" "0.121856866537718"  "0.0158730158730159"
##  [2,] "2"  "2" "0.195357833655706"  "0.118811881188119" 
##  [3,] "3"  "3" "0.034816247582205"  "0.0555555555555556"
##  [4,] "4"  "4" "0.02321083172147"   "0.0833333333333334"
##  [5,] "5"  "4" "0.0367504835589942" "0.105263157894737" 
##  [6,] "6"  "5" "0.0154738878143133" "0.125"             
##  [7,] "7"  "3" "0.2321083172147"    "0.158333333333333" 
##  [8,] "8"  "4" "0.0212765957446809" "0.181818181818182" 
##  [9,] "9"  "2" "0.0328820116054159" "0.0588235294117647"
## [10,] "10" "3" "0.0425531914893617" "0.181818181818182" 
## [11,] "11" "2" "0.0851063829787234" "0.204545454545455" 
## [12,] "12" "1" "0.158607350096712"  "0.317073170731707"